*************************************************
* Purpose -- To create 1990s data for AK, RI and DE 
* for restaurant sub-sectors.
************************************************
clear all
* RUN 00_path_master.do FIRST TO GET FILEPATHS

************************************************
* Getting 1990s data and constructing 7221 + 7222
************************************************

* keeping only agg lvl 56, industry codes 7221 and 7222 for the two states
import delimited using "$raw/qcew/1990.annual.singlefile.csv", clear
keep if (agglvl_code == 56) & inlist(industry_code,"7221","7222") & inlist(area_fips,"02000","10000","44000")

* randomly checking some files has revealed that - Rhode Island 
* has 7221 and 7222 for ownership code 5 only; Delaware has 7221 for ownership code
* 1 and 5, and only 5 for 7222. We just want to make sure every state in every year
* has own_code == 5 data for both 7221 and 7222. We will still add the own_code == 1
* as we are doing for the rest of the analysis, but it is negligible. What we don't
* want is if a state only has own_code == 1, because that is a very small portion of the total

bys area_fips industry_code: egen own_code_max = max(own_code)
assert own_code_max == 5

forvalues year = 1991/2001 {
	display "Working with `year'"
	preserve
		import delimited using "$raw/qcew/`year'.annual.singlefile.csv", clear
		keep if (agglvl_code == 56) & inlist(industry_code,"7221","7222") & inlist(area_fips,"02000","10000","44000")
		
		
		bys area_fips industry_code: egen own_code_max = max(own_code)
		assert own_code_max == 5
		
		tempfile year
		save `year'
	restore
	
	append using `year', force
}

* collapsing to get 7221 + 7222
collapse (sum) annual_avg_estabs annual_avg_emplvl total_annual_wages, by(area_fips year)


************************************************
* Getting 2001 722 data
************************************************

frame create year2001
frame change year2001

import delimited using "$raw/qcew/2001.annual.singlefile.csv", clear
keep if (agglvl_code == 55) & industry_code == "722" & inlist(area_fips,"02000","10000","44000")
collapse (sum) annual_avg_estabs annual_avg_emplvl total_annual_wages, by(area_fips year)

foreach var in annual_avg_estabs annual_avg_emplvl total_annual_wages {
	rename `var' `var'_722
}

tempfile 722
save `722'

frame change default

merge 1:1 area_fips year using `722'

* creating factor for scaling

local i = 1
foreach var in annual_avg_estabs annual_avg_emplvl total_annual_wages {
	bys area_fips (year): gen factor`i' = `var'_722[_N]/`var'[_N]
	local ++i
}

* scale

local j = 1
foreach var in annual_avg_estabs annual_avg_emplvl total_annual_wages {
	replace `var' = `var'*factor`j'
	local ++j
}

drop if year == 2001

foreach var in annual_avg_estabs annual_avg_emplvl total_annual_wages {
	rename `var' `var'55
}

keep area_fips year *55

save "$clean/qcew/AK_DE_RI_90s_scaled.dta", replace
